/*** This do-file creates 3 binned scatter plots that show the association between
	 COVID-19 incidence, spending and time away from home, at the county level.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "COVID-19 Incidence"

* Create required subfolders
cap mkdir "${root}/results/COVID-19 Incidence"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

*-------------------------------------------------------------------------------
* Load data
*-------------------------------------------------------------------------------

* Import COVID cases data
project, uses("${root}/data/web/data/COVID - County - Daily 2020.csv")
import delimited using "${root}/data/web/data/COVID - County - Daily 2020.csv", clear

tempfile covid_2020
save `covid_2020'

project, uses("${root}/data/web/data/COVID - County - Daily 2021.csv.gz")
gzimport delimited using "${root}/data/web/data/COVID - County - Daily 2021.csv.gz", clear

tempfile covid_2021
save `covid_2021'

clear
append using `covid_2020'
append using `covid_2021'

tempfile cases
save `cases'

* Import spending data
project, uses("${root}/data/web/data/Affinity - County - Daily.csv")
import delimited "${root}/data/web/data/Affinity - County - Daily.csv", clear
keep countyfips month day year spend_all
tempfile spending
save `spending'

* Import Google mobility data
project, uses("${root}/data/web/data/Google Mobility - County - Daily.csv.gz")
gzimport delimited using "${root}/data/web/data/Google Mobility - County - Daily.csv.gz", clear
rename gps* google_mob*

* Merge together
merge 1:1 year month day countyfips using `cases', nogen
merge 1:1 year month day countyfips using `spending', nogen

* Create date
gen date = mdy(month, day, year)
format date %td

* Rescale to be in percentage terms
replace google_mob_away_from_home = google_mob_away_from_home * 100
replace spend_all = spend_all * 100

*-------------------------------------------------------------------------------
* Collapse to county level
*-------------------------------------------------------------------------------
* Keep relevant dates
keep if inrange(date, mdy(3, 25, 2020), mdy(4, 14, 2020))

* Replace mobility with zero if weekend
replace google_mob_away_from_home = . if dow(date) == 6 | dow(date) == 0

* Collapse down to mean daily cases and mean time away from home
gcollapse (mean) case_rate google_mob_away_from_home google_mob_residential spend_all, by(countyfips)

* Merge on county covariates
rename countyfips county_fips
project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta")
merge 1:1 county_fips using "${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta", keep(1 3)
rename (medhhinc_2014_2018_est pop_2014_2018_est) (med_hhinc_2018 pop_2018)

* Create county income quartiles
xtile inc_quartile = med_hhinc_2018 [w = pop_2018], nquantiles(4)

* Create log of case rate
gen l_case_rate = log(case_rate)

*-------------------------------------------------------------------------------
* Plot binscatter of spending changes vs. COVID case rate
*-------------------------------------------------------------------------------

* Run regressions
foreach quartile in 1 4 {
	reg spend_all l_case_rate if inc_quartile == `quartile' [w = pop_2018],  r
	local coef_`quartile' : di %4.2f _b[l_case_rate]
	local se_`quartile' : di %4.2f _se[l_case_rate]
}

* Create binscatter
binscatter spend_all l_case_rate ///
	[w = pop_2018] ///
	if inlist(inc_quartile, 1, 4) ///
	, ///
	by(inc_quartile) ///
	ytitle("Change in Consumer Spending (%)" "from January to April 2020") ///
	xtitle("County-level COVID-19 Cases Per 100,000 People (Log Scale)") ///
	ylabel(-40 "-40%" -35 "-35%" -30 "-30%" -25 "-25%", nogrid) ///
	${title_`version'} ///
	xlabel(1.61 "5" 3 "20" 5.01 "150" 7 "1100") ///
	xscale(range(1.2 7)) ///
	yscale(range(-40 -25)) ///
	text(-25.5 5.5 "Low-Income Counties (Q1)" "Slope = `coef_1' (s.e. = `se_1')", color(oi1) size(medsmall)) ///
	text(-37.5 2.3 "High-Income Counties (Q4)" "Slope = `coef_4' (s.e. = `se_4')", color(oi2) size(medsmall)) ///
	legend(off)
oi_graph_export "${root}/results/COVID-19 Incidence/Spending changes vs COVID case rate", type(${fig_type})

* Black and white version for QJE
binscatter spend_all l_case_rate ///
	[w = pop_2018] ///
	if inlist(inc_quartile, 1, 4) ///
	, ///
	by(inc_quartile) colors(gs0 gs8) msymbols(O T) ///
	ytitle("Change in Consumer Spending (%)" "from January to April 2020") ///
	xtitle("County-level COVID-19 Cases Per 100,000 People (Log Scale)") ///
	ylabel(-40 "-40%" -35 "-35%" -30 "-30%" -25 "-25%", nogrid) ///
	${title_`version'} ///
	xlabel(1.61 "5" 3 "20" 5.01 "150" 7 "1100") ///
	xscale(range(1.2 7)) ///
	yscale(range(-40 -25)) ///
	text(-25.5 5.5 "Low-Income Counties (Q1)" "Slope = `coef_1' (s.e. = `se_1')", color(gs0) size(medsmall)) ///
	text(-37.5 2.3 "High-Income Counties (Q4)" "Slope = `coef_4' (s.e. = `se_4')", color(gs8) size(medsmall)) ///
	legend(off)

graph export "${root}/results/QJE_Figures_BlackAndWhite/Figure_2.svg", replace
project, creates("${root}/results/QJE_Figures_BlackAndWhite/Figure_2.svg")

* Output numbers
cap erase "${root}/results/paper numbers/`category'/Change in Consumer Spending vs COVID Case Rate.yaml"

yamlout using "${root}/results/paper numbers/`category'/Change in Consumer Spending vs COVID Case Rate.yaml", ///
	key("covid_low_slope") ///
	comment("Low income counties slope") ///
	value(`coef_1') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Change in Consumer Spending vs COVID Case Rate.yaml", ///
	key("covid_low_se") ///
	comment("Low income counties SE") ///
	value(`se_1') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Change in Consumer Spending vs COVID Case Rate.yaml", ///
	key("covid_high_slope") ///
	comment("High income counties slope") ///
	value(`coef_4') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Change in Consumer Spending vs COVID Case Rate.yaml", ///
	key("covid_high_se") ///
	comment("High income counties SE") ///
	value(`se_4') fmt(%9.2f)

project, creates("${root}/results/paper numbers/`category'/Change in Consumer Spending vs COVID Case Rate.yaml")

*-------------------------------------------------------------------------------
* Plot binscatter of time outside home vs. COVID case rate
*-------------------------------------------------------------------------------

* Run regression
foreach quartile in 1 4 {
	reg google_mob_away_from_home l_case_rate if inc_quartile == `quartile' [w = pop_2018], r
	local coef_`quartile' : di %4.2f _b[l_case_rate]
	local se_`quartile' : di %4.2f _se[l_case_rate]
}

* Create binscatter
binscatter google_mob_away_from_home l_case_rate ///
	if inlist(inc_quartile, 1, 4) ///
	[w = pop_2018] ///
	, ///
	by(inc_quartile) ///
	ytitle("Change in Time Spent Away From Home (%)" "from January to April 2020") ///
	xtitle("County-level COVID-19 Cases Per 100,000 People (Log Scale)") ///
	yscale(range(-35 -18)) ///
	xscale(range(1.3 7)) ///
	ylabel(-35 "-35%" -30 "-30%" -25 "-25%" -20 "-20%" -15 "-15%", nogrid ) ///
	${title_`version'} ///
	legend(off) ///
	text(-21 6.1 "Low-Income Counties (Q1)" "Slope = `coef_1' (s.e. = `se_1')", color(oi1) size(medsmall)) ///
	text(-32 2.3 "High-Income Counties (Q4)" "Slope = `coef_4' (s.e. = `se_4')", color(oi2) size(medsmall)) ///
	xlabel(1.61 "5" 3 "20" 5.01 "150" 7 "1100") xscale(range(3 7))

oi_graph_export "${root}/results/COVID-19 Incidence/Time outside home vs COVID case rate", type(${fig_type})

* Output numbers
cap erase "${root}/results/paper numbers/`category'/Change in Time Away from Home vs COVID Case Rate.yaml"

yamlout using "${root}/results/paper numbers/`category'/Change in Time Away from Home vs COVID Case Rate.yaml", ///
	key("home_low_slope") ///
	comment("Low income counties slope") ///
	value(`coef_1') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Change in Time Away from Home vs COVID Case Rate.yaml", ///
	key("home_low_se") ///
	comment("Low income counties SE") ///
	value(`se_1') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Change in Time Away from Home vs COVID Case Rate.yaml", ///
	key("home_high_slope") ///
	comment("High income counties slope") ///
	value(`coef_4') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Change in Time Away from Home vs COVID Case Rate.yaml", ///
	key("home_high_se") ///
	comment("High income counties SE") ///
	value(`se_4') fmt(%9.2f)

project, creates("${root}/results/paper numbers/`category'/Change in Time Away from Home vs COVID Case Rate.yaml")

*-------------------------------------------------------------------------------
* Plot binscatter of time outside home vs. median income
*-------------------------------------------------------------------------------

* Run regression
reg google_mob_away_from_home med_hhinc_2018 [w = pop_2018], r
local coef : di %4.2f 1000 * _b[med_hhinc_2018]
local se : di %4.2f 1000 * _se[med_hhinc_2018]

* Create binscatter
binscatter google_mob_away_from_home med_hhinc_2018 ///
	[w = pop_2018] ///
	, ///
	ytitle("Change in Time Spent Away From Home (%)" "from January to April 2020") ///
	xtitle("Median Household Income in 2014-2018 ($)") ///
	yscale(range(-32 -18)) ///
	ylabel(-30 "-30%" -25 "-25%" -20 "-20%" -15 "-15%", nogrid ) ///
	${title_`version'} ///
	xlab(40000(20000)100000, format(%9.0gc)) ///
	text(-32 57000 "Slope = `coef'%/$1000 (s.e. = `se')", color(gs8) size(medlarge))
oi_graph_export "${root}/results/COVID-19 Incidence/Time outside home vs Median Income", type(${fig_type})

* Output numbers
cap erase "${root}/results/paper numbers/`category'/Change in Time Away from Home vs Median Income.yaml"

yamlout using "${root}/results/paper numbers/`category'/Change in Time Away from Home vs Median Income.yaml", ///
	key("home_inc_slope") ///
	comment("Slope (%/1000$)") ///
	value(`coef') fmt(%9.2f)
yamlout using "${root}/results/paper numbers/`category'/Change in Time Away from Home vs Median Income.yaml", ///
	key("home_inc_se") ///
	comment("SE") ///
	value(`se') fmt(%9.2f)

project, creates("${root}/results/paper numbers/`category'/Change in Time Away from Home vs Median Income.yaml")
